# -*- coding: utf-8 -*-
import math
import copy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from jesgcc.utils.login import SeleniumLogin
from jesgcc.rules import *
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    custom_settings = {
        'CONCURRENT_REQUESTS': 4,
        'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 4,
        'CONCURRENT_REQUESTS_PER_IP': 4,
        'RETRY_TIMES': 5,
    }
    # 分类链接
    cat_index_url = "http://j.esgcc.com.cn/index/channelJump/5f7186aee4b0b6b3065b090e?grade=1"
    # 商品列表链接
    sku_list_url = 'http://j.esgcc.com.cn/search/interface/searchByQuery'

    # 详情页url
    item_url = "http://j.esgcc.com.cn/product/{}.html/?grade=1"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 40

    def start_requests(self):
        # cookies = SeleniumLogin().get_cookies()
        cookies = {'TOKEN': '3bbe4642-24fe-4ec5-a951-47edb55ae7a8'}
        if not cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', cookies)
        yield Request(
            url=self.cat_index_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no
            },
            headers={
                'Host': 'j.esgcc.com.cn',
                'Connection': 'keep-alive',
                'Upgrade-Insecure-Requests': '1',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
                'Referer': 'http://j.esgcc.com.cn/search/goodsSearchZone',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9',
            },
            cookies=copy.deepcopy(cookies),
            callback=self.parse_catalog,
            errback=self.error_back,
        )

        # 查询sku列表
        page = 0
        from_page = page * self.page_size
        yield scrapy.FormRequest(
            url=self.sku_list_url,
            method="POST",
            meta={
                'reqType': 'sku',
                'page': page,
                'batchNo': self.batch_no,
                'itemUrl': self.item_url
            },
            headers={
                'Content-Type': 'application/x-www-form-urlencoded',
                'Referer': 'http://j.esgcc.com.cn/indexView',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
            },
            formdata={
                'searchType': '0',
                'from': str(from_page),
                'size': str(self.page_size),
                'query': '(zoneLevel:1)',
                'word': '',
                'heighlight': '',
                'sort': '_id|desc',
                'excludes': 'goodsOrg,_table_src,_insert_time,typeId,contractId,classPath',
                'type': '2'
            },
            cookies=copy.deepcopy(cookies),
            callback=self.parse_sku_list,
            errback=self.error_back,
        )

    def parse_catalog(self, response):
        cats = parse_catalog(response)
        if cats and len(cats) > 0:
            self.logger.info('分类: count[%s]' % len(cats))
            # TODO:目前网站首页三级分类id有问题，好了再取消注释，否则会污染分类池 8月5日
            # yield Box('catalog', self.batch_no, ca  ts)

    def parse_sku_list(self, response):
        meta = response.meta
        page = meta.get('page')
        if response.text:
            sku_list, item_list = parse_sku_item_list(response)
            if sku_list and len(sku_list) > 0:
                self.logger.info('分页: page=%s, count=%s' % (page, len(sku_list)))
                yield Box('sku', self.batch_no, sku_list)
                yield Box('item', self.batch_no, item_list)
            else:
                self.logger.info('空页: page=%s' % page)

            total = parse_total_page(response)
            # 查询下一页
            if page == 0:
                if total >= len(sku_list):
                    total_page = math.ceil(total / self.page_size)
                    for idx in range(1, total_page + 1):
                        from_page = idx * self.page_size
                        yield scrapy.FormRequest(
                            url=self.sku_list_url,
                            method="POST",
                            meta={
                                'reqType': 'sku',
                                'page': idx,
                                'batchNo': self.batch_no,
                                'itemUrl': self.item_url
                            },
                            headers={
                                'Content-Type': 'application/x-www-form-urlencoded',
                                'Referer': 'http://j.esgcc.com.cn/search/goodsSearch?searchType=gcIdSearch&keyword=26,2601,2601001&grade=1',
                            },
                            formdata={
                                'searchType': '0',
                                'from': str(from_page),
                                'size': str(self.page_size),
                                'query': '(zoneLevel:1)',
                                'word': '',
                                'heighlight': '',
                                'sort': '_id|desc',
                                'excludes': 'goodsOrg,_table_src,_insert_time,typeId,contractId,classPath',
                                'type': '2'
                            },
                            cookies=copy.deepcopy(response.request.cookies),
                            callback=self.parse_sku_list,
                            errback=self.error_back,
                        )
                else:
                    self.logger.error('第一页无数据: page[%s]' % page)
                    yield retry_request(response.request)
        else:
            self.logger.info('空响应重试: page=%s' % page)
            yield retry_request(response.request)


